# Importing SPC class
from spc import SPC
# Pandas and numpy to create example input time series datasets
import pandas as pd
import numpy as np
"""
Artificial daily data with three distinct process changes, sampled from a Normal distribution.
"""
# Empty dataframes
df_1 = pd.DataFrame()
df_2 = pd.DataFrame()
df_3 = pd.DataFrame()
# Create each dataframe sampling from normal dist. with different params.
df_1['ds'] = pd.date_range(start='2020-01-01', end='2020-03-01', freq='d')
df_1['TARGET'] = np.random.normal(loc=50, scale=10, size=len(df_1))
df_2['ds'] = pd.date_range(start='2020-03-02', end='2020-06-01', freq='d')
df_2['TARGET'] = np.random.normal(loc=40, scale=6, size=len(df_2))
df_3['ds'] = pd.date_range(start='2020-06-02', end='2020-09-01', freq='d')
df_3['TARGET'] = np.random.normal(loc=60, scale=7, size=len(df_3))
# Combine all datasets into one, with clear process changes at 2020-03-02 and 2020-06-02.
df = pd.concat([df_1, df_2, df_3])
# Plot first five rows.
df.head()
| ds | TARGET | |
|---|---|---|
| 0 | 2020-01-01 | 44.056018 |
| 1 | 2020-01-02 | 46.921141 |
| 2 | 2020-01-03 | 24.374112 |
| 3 | 2020-01-04 | 44.622991 |
| 4 | 2020-01-05 | 64.578163 |
In this example, we will create a simple SPC chart. For this example, we will ignore any process changes and plot the SPC chart with only a single calculation for the control lines (i.e., the control lines will take all data into account).
spc_example = SPC(data_in=df, # data we created above (input dataset)
target_col='TARGET', # name of column to analyse (existing within df)
date_col = 'ds', # name of date column (existing within df)
chart_type='XmR-chart') # We will use an XmR chart in this example
# First method to calculate control lines and store the data within the class.
spc_example.setup()
# Next, we call the check_rules() method to test SPC rules, using the above calculated control lines.
spc_example.check_rules()
# Plot the resulting chart in Plotly (interactive & downloadable plot)
spc_example.plot_spc(title='Example SPC (XmR-chart) - Basic usage')
# If we want the data to build our own SPC charts, use the return_data() method.
# Note this returns 2 dataframes.
# - Data for Variable Control Chart (top chart)
# - Data for Moving Range Control Chart (bottom chart)
spc_data_X, spc_data_Y = spc_example.return_data()
spc_data_X.head()
| ds | TARGET | cl | lcl | ucl | +1sd | -1sd | +2sd | -2sd | Rule 1 violation | Rule 2 violation | Rule 3 violation | Rule 4 violation | Rule 5 violation | Rule 6 violation | Rule 7 violation | Rule 8 violation | chart type | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-01 | 44.056018 | 50.517474 | 27.636014 | 73.398935 | 58.144628 | 42.890321 | 65.771781 | 35.263167 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Variable Control Chart |
| 1 | 2020-01-02 | 46.921141 | 50.517474 | 27.636014 | 73.398935 | 58.144628 | 42.890321 | 65.771781 | 35.263167 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Variable Control Chart |
| 2 | 2020-01-03 | 24.374112 | 50.517474 | 27.636014 | 73.398935 | 58.144628 | 42.890321 | 65.771781 | 35.263167 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Variable Control Chart |
| 3 | 2020-01-04 | 44.622991 | 50.517474 | 27.636014 | 73.398935 | 58.144628 | 42.890321 | 65.771781 | 35.263167 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Variable Control Chart |
| 4 | 2020-01-05 | 64.578163 | 50.517474 | 27.636014 | 73.398935 | 58.144628 | 42.890321 | 65.771781 | 35.263167 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Variable Control Chart |
# Note that spc_data_Y will be None in SPC charts that only include a single chart.
spc_data_Y.head()
| ds | TARGET | r | cl | lcl | ucl | +1sd | -1sd | +2sd | -2sd | Rule 1 violation | Rule 4 violation | Rule 5 violation | Rule 8 violation | chart type | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-01 | 44.056018 | NaN | 8.603429 | 0 | 30.606699 | 1.269006 | 15.937852 | -6.065418 | 23.272276 | 0 | 0 | 0 | 0 | Moving Range Control Chart |
| 1 | 2020-01-02 | 46.921141 | 2.865123 | 8.603429 | 0 | 30.606699 | 1.269006 | 15.937852 | -6.065418 | 23.272276 | 0 | 0 | 0 | 0 | Moving Range Control Chart |
| 2 | 2020-01-03 | 24.374112 | 22.547028 | 8.603429 | 0 | 30.606699 | 1.269006 | 15.937852 | -6.065418 | 23.272276 | 0 | 0 | 0 | 0 | Moving Range Control Chart |
| 3 | 2020-01-04 | 44.622991 | 20.248878 | 8.603429 | 0 | 30.606699 | 1.269006 | 15.937852 | -6.065418 | 23.272276 | 0 | 0 | 0 | 0 | Moving Range Control Chart |
| 4 | 2020-01-05 | 64.578163 | 19.955172 | 8.603429 | 0 | 30.606699 | 1.269006 | 15.937852 | -6.065418 | 23.272276 | 0 | 0 | 0 | 0 | Moving Range Control Chart |
If you know a process change is occuring, and want to re-calculate the control lines to reflect the change, we can include the change_dates argument (must be a list of date(s)) in the initialisation, shown below.
spc_example = SPC(data_in=df, # data we created above
target_col='TARGET', # name of column to analyse
date_col = 'ds', # name of date column
chart_type='XmR-chart',
change_dates=['2020-03-02', '2020-06-02']) # We will specify change date (needs to be in a list)
# change_dates can include as many dates as necessary, but must be in a list.
spc_example.setup()
spc_example.check_rules()
spc_example.plot_spc(title='Example SPC (XmR-chart) - Re-calculating control lines')
Now, suppose we didn't know a whether a change in the system had an impact on our measured process. Rather than using the change_dates parameter, we can use the baseline_date parameter, to calculate control lines only on data before this specified date. This will give an indication of the impact to the measured process following a change to the system.
spc_example = SPC(data_in=df, # data we created above
target_col='TARGET', # name of column to analyse
date_col = 'ds', # name of date column
chart_type='Individual-chart',
baseline_date='2020-03-01') # we will specify baseline date
spc_example.setup()
spc_example.check_rules()
spc_example.plot_spc(title='Example SPC (Individual-chart) - Setting baseline date')
Our examples have used daily data, but data of any frequency can be used.
"""
Artificial hourly data with three distinct process changes, sampled from a Poisson distribution.
"""
df = pd.DataFrame()
df['ds'] = pd.date_range(start='2020-01-01', end='2020-02-01', freq='H')
df['TARGET'] = np.random.poisson(lam = 10, size=len(df))
# Print first few rows
df.head()
| ds | TARGET | |
|---|---|---|
| 0 | 2020-01-01 00:00:00 | 14 |
| 1 | 2020-01-01 01:00:00 | 10 |
| 2 | 2020-01-01 02:00:00 | 7 |
| 3 | 2020-01-01 03:00:00 | 17 |
| 4 | 2020-01-01 04:00:00 | 2 |
spc_example = SPC(data_in=df, # data we created above (input dataset)
target_col='TARGET', # name of column to analyse (existing within df)
date_col = 'ds', # name of date column (existing within df)
chart_type='c-chart')
# First method to calculate control lines and store the data within the class.
spc_example.setup()
# Next, we call the check_rules() method to test SPC rules, using the above calculated control lines.
spc_example.check_rules()
# Plot the resulting chart in Plotly (interactive & downloadable plot)
spc_example.plot_spc(title='Example SPC (c-chart) - Hourly data')
Some SPC charts take averages of samples of data, meaning each day (or hour, week, etc...), has n samples. Therefore, in these cases, we need to include a value for the sample_size argument in the initialisation.
# Creating dataset
df_1 = pd.DataFrame()
df_2 = pd.DataFrame()
df_1['ds'] = list(pd.date_range(start='2020-01-01', end='2020-03-01', freq='w'))*2
df_1['TARGET'] = np.random.normal(loc=40, scale=10, size=len(df_1))
df_2['ds'] = list(pd.date_range(start='2020-03-01', end='2020-09-01', freq='w'))*2
df_2['TARGET'] = np.random.normal(loc=65, scale=10, size=len(df_2))
df = pd.concat([df_1, df_2], axis = 0).sort_values(by = 'ds').reset_index(drop=True)
df.head()
| ds | TARGET | |
|---|---|---|
| 0 | 2020-01-05 | 46.617143 |
| 1 | 2020-01-05 | 32.244393 |
| 2 | 2020-01-12 | 39.718257 |
| 3 | 2020-01-12 | 32.357934 |
| 4 | 2020-01-19 | 38.429614 |
spc_example = SPC(data_in=df, # data we created above (input dataset)
target_col='TARGET', # name of column to analyse (existing within df)
date_col = 'ds', # name of date column (existing within df)
chart_type='XbarR-chart',
sample_size=2, # Specify sample size
change_dates=['2020-03-01'])
# First method to calculate control lines and store the data within the class.
spc_example.setup()
# Next, we call the check_rules() method to test SPC rules, using the above calculated control lines.
spc_example.check_rules()
# Plot the resulting chart in Plotly (interactive & downloadable plot)
spc_example.plot_spc(title='Example SPC (X bar R -chart) - Re-calculating control lines')
Generally a sample size $>=5$ qualifies the use of the $\bar{X}S - chart$ (standard deviation rather than range)
# Weekly data
df = pd.DataFrame()
df['ds'] = list(pd.date_range(start='2020-01-01', end='2020-06-01', freq='W'))*10
df['TARGET'] = np.random.normal(loc=50, scale=10, size=len(df))
df = df.sort_values(by = 'ds').reset_index()
spc_example = SPC(data_in=df, # data we created above (input dataset)
target_col='TARGET', # name of column to analyse (existing within df)
date_col = 'ds', # name of date column (existing within df)
chart_type='XbarS-chart',
sample_size=10) # Specify constant sample size
# First method to calculate control lines and store the data within the class.
spc_example.setup()
# Next, we call the check_rules() method to test SPC rules, using the above calculated control lines.
spc_example.check_rules()
# Plot the resulting chart in Plotly (interactive & downloadable plot)
spc_example.plot_spc(title='Example SPC (X bar S -chart) - Basic usage')
With a p-charts & u-charts, you'll need to feed in an additional 'n' column, represnting the sample size, in order to calulate proportions. Since we need to know n at each position, the baseline approach would no longer be suitable.
df = pd.DataFrame()
df['ds'] = list(pd.date_range(start='2020-01-01', end='2020-01-15', freq='d'))
df['TARGET'] = np.random.randint(5, 20, len(df))
df['n'] = np.random.randint(50, 80, len(df))
df = df.sort_values(by = 'ds').reset_index()
spc_example = SPC(data_in=df, # data we created above (input dataset)
target_col='TARGET', # name of column to analyse (existing within df)
date_col = 'ds', # name of date column (existing within df)
chart_type='p-chart')
# First method to calculate control lines and store the data within the class.
spc_example.setup()
# Next, we call the check_rules() method to test SPC rules, using the above calculated control lines.
spc_example.check_rules()
# Plot the resulting chart in Plotly (interactive & downloadable plot)
spc_example.plot_spc(title='Example SPC (p-chart) - Basic usage')
spc_example = SPC(data_in=df, # data we created above (input dataset)
target_col='TARGET', # name of column to analyse (existing within df)
date_col = 'ds', # name of date column (existing within df)
chart_type='u-chart')
# First method to calculate control lines and store the data within the class.
spc_example.setup()
# Next, we call the check_rules() method to test SPC rules, using the above calculated control lines.
spc_example.check_rules()
# Plot the resulting chart in Plotly (interactive & downloadable plot)
spc_example.plot_spc(title='Example SPC (u-chart) - Basic usage')
# Creating another dataset.
df_1 = pd.DataFrame()
df_2 = pd.DataFrame()
df_1['ds'] = list(pd.date_range(start='2020-01-01', end='2020-01-15', freq='d'))
df_2['ds'] = list(pd.date_range(start='2020-01-16', end='2020-01-30', freq='d'))
df_1['TARGET'] = np.random.randint(5, 20, len(df_1))
df_2['TARGET'] = np.random.randint(25, 40, len(df_2))
df_1['n'] = np.random.randint(50, 80, len(df_1))
df_2['n'] = np.random.randint(40, 50, len(df_2))
df = pd.concat([df_1, df_2], axis = 0).reset_index(drop=True)
df.head()
| ds | TARGET | n | |
|---|---|---|---|
| 0 | 2020-01-01 | 14 | 73 |
| 1 | 2020-01-02 | 8 | 65 |
| 2 | 2020-01-03 | 10 | 55 |
| 3 | 2020-01-04 | 12 | 53 |
| 4 | 2020-01-05 | 11 | 50 |
spc_example = SPC(data_in=df, # data we created above (input dataset)
target_col='TARGET', # name of column to analyse (existing within df)
date_col = 'ds', # name of date column (existing within df)
chart_type='u-chart',
change_dates=['2020-01-15'])
# First method to calculate control lines and store the data within the class.
spc_example.setup()
# Next, we call the check_rules() method to test SPC rules, using the above calculated control lines.
spc_example.check_rules()
# Plot the resulting chart in Plotly (interactive & downloadable plot)
spc_example.plot_spc(title='Example SPC (u-chart) - Recalculating control limits after process change.')